import seaborn as sns
df = sns.load_dataset('tips')
df
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
Goal. Predict tips based on total bills using a function: $$f(\text{total bill}) = a\cdot (\text{total bill}) + b$$
We want to find a function values of the parameters $a$ and $b$ such that the value
$$C(a, b) = \sum_i \left(y^{(i)} - f(x^{(i)})\right)^2$$is as small as possible. Note that $x^{(i)}$ and $y^{(i)}$ are known (they are coming from the data), and the parameters $a, b$ are unknown. The function $C(a, b)$ is called the cost function.
From the course website:
from ipywidgets import interact, fixed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
sns.set_context("notebook")
df = sns.load_dataset('tips')
def tip_plot(a, b):
predict = a*df['total_bill'] + b
cost = ((predict - df['tip'])**2).sum()
plt.figure(figsize=(12,7))
sns.scatterplot(data=df, x="total_bill", y="tip", marker='o')
x = np.arange(0, 55)
plt.plot(x, a*x + b, c='b', lw=5, alpha=0.5)
plt.ylim(0, 11)
plt.title(f"a={a:.2f} b={b:.2f} C(a, b)={cost:.2f}", fontdict={'fontsize':20})
plt.show()
interact(tip_plot, a=(0.1, 0.2, 0.01), b=(0.0, 1.0, 0.1));
interactive(children=(FloatSlider(value=0.15000000000000002, description='a', max=0.2, min=0.1, step=0.01), Fl…
Gradient descent is a method of finding a minimum of a function $f$.
Algorithm:
From the course website:
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def descent(Df, x0, l_rate=0.1, nsteps=1000):
'''
Performs gradient descent of a given function f.
Df:
Differential of f
x0:
The xtarrting point.
l_rate:
The learning rate.
nsteps:
Number of iterations to run.
Returns:
A list of points computed during steps of the gradient descent.
'''
x = np.array(x0, dtype='float')
path = [x]
for i in range(nsteps):
Dfx = np.array(Df(x))
x = x - l_rate*Dfx
path.append(x)
return path
def plot_descent(f, xlim, ylim, path=None, levels=20):
'''
Creates contour plot of a functions and the path
computed by gradient descent applied to the function.
f:
Function to be plotted
path:
List of coordinates of points computed by the
gradient descent algorithm.
xlim, ylim:
Tuples with limits of x- and y-values for the contour
plot of the function.
levels:
Specifies levels of the contour plot.
'''
plt.figure(figsize=(8, 8))
x, y = np.meshgrid(np.linspace(*xlim, 1000), np.linspace(*ylim, 1000))
Z = f(np.vstack([x.ravel(), y.ravel()])).reshape(x.shape)
plt.contourf(x, y, Z, levels=levels, cmap='bone')
plt.contour(x, y, Z, levels=levels, colors='gray')
if path is not None:
plt.plot([x[0] for x in path], [x[1] for x in path], 'ro-', ms=4)
plt.show()
def plot_descent_step(f, xlim, ylim, path=None, levels=20, last=None, step=1):
plot_descent(f=f,
xlim=xlim,
ylim=ylim,
path=path[:last:step],
levels=levels)
def plot3d(f, xlim, ylim):
x = np.linspace(xlim[0], xlim[1], 400)
y = np.linspace(ylim[0], ylim[1], 400)
X, Y = np.meshgrid(x, y)
Z = f(np.array([X, Y]))
fig = go.Figure(go.Surface(x=X, y=Y, z=Z, colorscale="picnic"))
fig.update_layout(autosize=False, width=800, height=600)
fig.show()
Example 1. Gradient descent for $p(x, y) = ax^2 + by^2$.
a = 2
b = 1
def p(x):
return a*x[0]**2 + b*x[1]**2
plot3d(p, (-5, 5), (-5, 5))
def Dp(x):
return (2*a*x[0], 2*b*x[1])
path = descent(Dp, x0=[2,-3], l_rate=0.1, nsteps=50)
plot_descent(p,
xlim=(-5, 5),
ylim=(-5, 5),
path=path,
levels=40)
path[-1]
array([ 1.61656255e-11, -4.28174308e-05])
Note. This is a good example to illustrate why before using gradient descent to compute regression of data we need to normalize data features. In the function above if values of the parameters a and b are close to each other, gradient descent converges quickly. Convergence is much slower though if one of these parameters is much larger than the other:
a = 50
b = 1
plot3d(p, (-5, 5), (-5, 5))
a = 50
b = 1
path = descent(Dp, x0=[2,4], l_rate=0.015, nsteps=1000)
plot_descent(p,
xlim=(-5, 5),
ylim=(-5, 5),
path=path,
levels=40)
path[-1]
array([1.86652724e-301, 2.36479913e-013])
From the course website:
def h(x):
'''
Himmelblau's function
h(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2
'''
return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
def Dh(x):
return np.array([
2 * (x[0]**2 + x[1] - 11) * 2 * x[0] + 2 * (x[0] + x[1]**2 - 7),
2 * (x[0]**2 + x[1] - 11) + 2 * (x[0] + x[1]**2 - 7) * 2 * x[1]
])
def r(x):
'''
Rosenbrock function
r(x, y) = (1-x)^2 + 100(y-x^2)^2
'''
return (1-x[0])**2 + 100*(x[1]-x[0]**2)**2
def Dr(x):
return np.array([-2*(1-x[0]) - 400*(x[1]-x[0]**2)*x[0], 200*(x[1]-x[0]**2)])
Himmelblau function:
plot3d(h, (-5, 5), (-5, 5))
path = descent(Dh, x0=[-1,1], l_rate=0.01)
plot_descent(h,
xlim=(-5, 5),
ylim=(-5, 5),
path=path,
levels=np.exp(np.linspace(-7, 6.5, 40)) - 2)
Rosenbrock function:
plot3d(r, (-1.4, 1.4), (-0.5, 1.4))
path = descent(Dr, x0=[-0.5,1], nsteps=20000, l_rate=0.001)
plot_descent(r,
xlim=(-1.4, 1.4),
ylim=(-0.5, 1.4),
path=path,
levels= np.exp(np.linspace(-7, 6.5, 40)) - 2)
sklearn¶from sklearn.linear_model import LinearRegression
reg = LinearRegression()
df
| total_bill | tip | sex | smoker | day | time | size | |
|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
reg.fit(df[['total_bill']], df['tip'])
LinearRegression()
reg.coef_, reg.intercept_
(array([0.10502452]), 0.9202696135546731)
plt.figure(figsize=(12,7))
sns.scatterplot(data=df, x="total_bill", y="tip", marker='o')
x = np.arange(0, 55)
plt.plot(x, reg.coef_*x + reg.intercept_, c='b')
plt.show()
reg.predict(df[['total_bill']])[:10]
array([2.70463616, 2.00622312, 3.12683472, 3.40725019, 3.5028225 ,
3.57633966, 1.84133463, 3.74332864, 2.49983836, 2.47253198])
df['regression'] = reg.predict(df[['total_bill']])
df
| total_bill | tip | sex | smoker | day | time | size | regression | |
|---|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 2.704636 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 2.006223 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 3.126835 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 3.407250 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 3.502822 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 3.969131 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 3.774836 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 3.301175 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 2.791807 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 2.892630 |
244 rows × 8 columns
df['regression_error'] = df['regression'] - df['tip']
df
| total_bill | tip | sex | smoker | day | time | size | regression | regression_error | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 2.704636 | 1.694636 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 2.006223 | 0.346223 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 3.126835 | -0.373165 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 3.407250 | 0.097250 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 3.502822 | -0.107178 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 3.969131 | -1.950869 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 3.774836 | 1.774836 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 3.301175 | 1.301175 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 2.791807 | 1.041807 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 2.892630 | -0.107370 |
244 rows × 9 columns
df[['regression_error']].describe()
| regression_error | |
|---|---|
| count | 2.440000e+02 |
| mean | -5.223508e-16 |
| std | 1.019943e+00 |
| min | -3.743435e+00 |
| 25% | -4.863111e-01 |
| 50% | 9.744499e-02 |
| 75% | 5.651615e-01 |
| max | 3.198225e+00 |
np.abs(df[['regression_error']]).describe()
| regression_error | |
|---|---|
| count | 244.000000 |
| mean | 0.745825 |
| std | 0.694074 |
| min | 0.002632 |
| 25% | 0.276832 |
| 50% | 0.541028 |
| 75% | 0.999040 |
| max | 3.743435 |
total_bill and size¶reg2 = LinearRegression()
reg2.fit(df[['total_bill', 'size']], df['tip'])
reg2.coef_, reg.intercept_
(array([0.09271334, 0.19259779]), 0.9202696135546731)
df['regression_2'] = reg2.predict(df[['total_bill', 'size']])
df
| total_bill | tip | sex | smoker | day | time | size | tip_fraction | naive_tip_prediction | naive_prediction_error | regression | regression_error | regression_2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 | 2.732036 | 1.722036 | 2.704636 | 1.694636 | 2.629340 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 | 1.662699 | 0.002699 | 2.006223 | 0.346223 | 2.205394 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 | 3.378462 | -0.121538 | 3.126835 | -0.373165 | 3.194645 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 | 3.807805 | 0.497805 | 3.407250 | 0.097250 | 3.249592 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 | 3.954135 | 0.344135 | 3.502822 | -0.107178 | 3.719157 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 0.203927 | 4.668099 | -1.251901 | 3.969131 | -1.950869 | 3.938206 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 0.073584 | 4.370614 | 2.370614 | 3.774836 | 1.774836 | 3.574089 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 0.088222 | 3.645395 | 1.645395 | 3.301175 | 1.301175 | 3.155952 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 0.098204 | 2.865502 | 1.115502 | 2.791807 | 1.041807 | 2.706292 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 0.159744 | 3.019872 | 0.019872 | 2.892630 | -0.107370 | 2.795297 |
244 rows × 13 columns
df['regression_2 error'] = df['regression_2'] - df['tip']
np.abs(df[['regression_error', 'regression_2 error']]).describe()
| regression_error | regression_2 error | |
|---|---|---|
| count | 244.000000 | 244.000000 |
| mean | 0.745825 | 0.739004 |
| std | 0.694074 | 0.685833 |
| min | 0.002632 | 0.002048 |
| 25% | 0.276832 | 0.273011 |
| 50% | 0.541028 | 0.536010 |
| 75% | 0.999040 | 0.959264 |
| max | 3.743435 | 4.042497 |
Upshot: Adding size data does not seem to improve meaningfully the fit of the model.
reg.score(df[['total_bill']], df['tip'])
0.45661658635167657
reg2.score(df[['total_bill', 'size']], df['tip'])
0.46786930879612587
total_bill and day:¶Create dummy variables for the day column:
dummies = pd.get_dummies(df['day'])
dummies
| Thur | Fri | Sat | Sun | |
|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 |
| 1 | 0 | 0 | 0 | 1 |
| 2 | 0 | 0 | 0 | 1 |
| 3 | 0 | 0 | 0 | 1 |
| 4 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... |
| 239 | 0 | 0 | 1 | 0 |
| 240 | 0 | 0 | 1 | 0 |
| 241 | 0 | 0 | 1 | 0 |
| 242 | 0 | 0 | 1 | 0 |
| 243 | 1 | 0 | 0 | 0 |
244 rows × 4 columns
df = pd.concat([df, dummies], axis=1)
df
| total_bill | tip | sex | smoker | day | time | size | tip_fraction | naive_tip_prediction | naive_prediction_error | regression | regression_error | regression_2 | regression_2 error | Thur | Fri | Sat | Sun | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 | 2.732036 | 1.722036 | 2.704636 | 1.694636 | 2.629340 | 1.619340 | 0 | 0 | 0 | 1 |
| 1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 | 1.662699 | 0.002699 | 2.006223 | 0.346223 | 2.205394 | 0.545394 | 0 | 0 | 0 | 1 |
| 2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 | 3.378462 | -0.121538 | 3.126835 | -0.373165 | 3.194645 | -0.305355 | 0 | 0 | 0 | 1 |
| 3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 | 3.807805 | 0.497805 | 3.407250 | 0.097250 | 3.249592 | -0.060408 | 0 | 0 | 0 | 1 |
| 4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 | 3.954135 | 0.344135 | 3.502822 | -0.107178 | 3.719157 | 0.109157 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 0.203927 | 4.668099 | -1.251901 | 3.969131 | -1.950869 | 3.938206 | -1.981794 | 0 | 0 | 1 | 0 |
| 240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 0.073584 | 4.370614 | 2.370614 | 3.774836 | 1.774836 | 3.574089 | 1.574089 | 0 | 0 | 1 | 0 |
| 241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 0.088222 | 3.645395 | 1.645395 | 3.301175 | 1.301175 | 3.155952 | 1.155952 | 0 | 0 | 1 | 0 |
| 242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 0.098204 | 2.865502 | 1.115502 | 2.791807 | 1.041807 | 2.706292 | 0.956292 | 0 | 0 | 1 | 0 |
| 243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 0.159744 | 3.019872 | 0.019872 | 2.892630 | -0.107370 | 2.795297 | -0.204703 | 1 | 0 | 0 | 0 |
244 rows × 18 columns
reg3 = LinearRegression()
reg3.fit(df[['total_bill', 'Thur', 'Fri', 'Sat', 'Sun']], df['tip'])
reg3.coef_, reg.intercept_
(array([ 0.1046728 , -0.01132963, 0.00755391, -0.07843209, 0.08220781]), 0.9202696135546731)
df['regression_3'] = reg3.predict(df[['total_bill', 'Thur', 'Fri', 'Sat', 'Sun']])
df['regression_3 error'] = df['regression_3'] - df['tip']
np.abs(df[['regression_error', 'regression_3 error']]).describe()
| regression_error | regression_3 error | |
|---|---|---|
| count | 244.000000 | 244.000000 |
| mean | 0.745825 | 0.739488 |
| std | 0.694074 | 0.697760 |
| min | 0.002632 | 0.001820 |
| 25% | 0.276832 | 0.252676 |
| 50% | 0.541028 | 0.531382 |
| 75% | 0.999040 | 1.010272 |
| max | 3.743435 | 3.828128 |
reg.score(df[['total_bill']], df['tip'])
0.45661658635167657
reg3.score(df[['total_bill', 'Thur', 'Fri', 'Sat', 'Sun']], df['tip'])
0.45887402603123395
Upshot: The day data does not improve the model.
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=3)
neigh.fit(df[["total_bill"]], df["tip"])
KNeighborsRegressor(n_neighbors=3)
neigh.predict([[20]])
/Users/bb/opt/anaconda3/lib/python3.8/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but KNeighborsRegressor was fitted with feature names
array([3.50666667])
s = "Hello there! How are you?"
s.lower()
'hello there! how are you?'
s.upper()
'HELLO THERE! HOW ARE YOU?'
s.split()
['Hello', 'there!', 'How', 'are', 'you?']
s.split("e")
['H', 'llo th', 'r', '! How ar', ' you?']
pieces = s.split()
pieces
['Hello', 'there!', 'How', 'are', 'you?']
"----".join(pieces)
'Hello----there!----How----are----you?'
s.find("there")
6
s[6:]
'there! How are you?'
s.replace("re", "<RE>")
'Hello the<RE>! How a<RE> you?'
print("Hello\nthere\t!")
Hello there !
print("Hello\\nthere\\t!")
Hello\nthere\t!
print(r"Hello\nthere\t!")
Hello\nthere\t!
print("Hi \U0001F642")
Hi 🙂
print(r"Hi \U0001F642")
Hi \U0001F642
import re
From the course website:
sample = """I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
"""
import html
import re
from IPython.core.display import display, HTML
def re_show(regex, text="", flags=0):
"""
Displays text with the regex match highlighted.
"""
text_css = '''"border-style: none;
border-width: 0px;
padding: 0px;
font-size: 14px;
color: darkslategray;
background-color: white;
white-space: pre;
line-height: 20px;"
'''
match_css = '''"padding: 0px 1px 0px 1px;
margin: 0px 0.5px 0px 0.5px;
border-style: solid;
border-width: 0.5px;
border-color: darkred;
background-color: cornsilk;
color: red;"
'''
r = re.compile(f"({regex})", flags=flags)
t = r.sub(fr'###START###\1###END###', text)
t = html.escape(t)
t = t.replace("###START###", f"<span style={match_css}>")
t = t.replace("###END###", f"</span>")
display(HTML(f'<code style={text_css}>{t}</code>'))
re_show(r"the", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Alternative:
re_show(r"the|The", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Grouping patterns:
re_show(r"T|the", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"(T|t)he", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Character classes:
re_show(r"e.a", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"\w", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"\W", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Note matches of newline characters:
sample
'I am writing in reference to the our conversation on Tuesday, March 2, 2021.\nYour order of 100,000 units of GC-1344 microcontrollers has been confirmed.\nYou will be billed $19.50 per unit for the total of $1,950,000.00.\nThe expected delivery date is Friday, April 30, 2021. In case of any further\nquestions please contact our sales department by email support@simonis.biz,\nphone (294) 934-0923 or fax (294) 934-0202.\n\nDelfina Fischer\nManager, Simonis LLC\ndelfina.fisher@simonis.biz\n(294) 934 0937\n'
re_show(r"\d", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"[A-Za-z]", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"[aeiouy]", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"[^A-Za-z]", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"a[a-z]", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"\d\d\d", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Repetitions:
re_show(r"\d{3}", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"\d*", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"\d+", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"a\w?", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Greedy vs non-greedy matching:
re_show(r"Y.*u", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"Y.*?u", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Flags:
re_show(r"Y.*u", sample, flags=re.S)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"the", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show(r"the", sample, flags=re.I)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Anchors:
re_show("^\w+", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show("\w+$", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show("^\w+", sample, flags=re.M)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show("\w+$", sample, flags=re.M)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re_show("^.*2021.*$", sample, re.M)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Example. Match dollar amounts:
re_show(r"\$\d[\d,]*\.\d{2}", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Example. Match phone numbers:
re_show(r"\(\d{3}\) \d{3}[ -]\d{4}", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Example. Match emails:
re_show(r"[\w.]+@[\w.]+\w", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
Example. Match dates:
re_show(r"[A-Za-z]+, [A-Z-a-z]+ \d+, \d{4}", sample)
I am writing in reference to the our conversation on Tuesday, March 2, 2021.
Your order of 100,000 units of GC-1344 microcontrollers has been confirmed.
You will be billed $19.50 per unit for the total of $1,950,000.00.
The expected delivery date is Friday, April 30, 2021. In case of any further
questions please contact our sales department by email support@simonis.biz,
phone (294) 934-0923 or fax (294) 934-0202.
Delfina Fischer
Manager, Simonis LLC
delfina.fisher@simonis.biz
(294) 934 0937
re.findall(r"\$\d[\d,]*\.\d{2}", sample)
['$19.50', '$1,950,000.00']
Match groups:
re.findall(r"\$(\d[\d,]*.\d{2})", sample)
['19.50', '1,950,000.00']
re.findall("fax [\d\(\) -]+", sample)
['fax (294) 934-0202']
re.findall("fax ([\d\(\) -]+)", sample)
['(294) 934-0202']
re.findall(r"\((\d{3})\) (\d{3})[ -](\d{4})", sample)
[('294', '934', '0923'), ('294', '934', '0202'), ('294', '934', '0937')]
Non-capturing match groups:
re.findall("(you|your|our) (\w+)", sample, flags=re.I)
[('our', 'conversation'), ('Your', 'order'), ('You', 'will'), ('our', 'sales')]
re.findall("(?:you|your|our) (\w+)", sample, flags=re.I)
['conversation', 'order', 'will', 'sales']
print(re.sub("\$\d[\d,]*\.\d{2}", "**AMOUNT**", sample))
I am writing in reference to the our conversation on Tuesday, March 2, 2021. Your order of 100,000 units of GC-1344 microcontrollers has been confirmed. You will be billed **AMOUNT** per unit for the total of **AMOUNT**. The expected delivery date is Friday, April 30, 2021. In case of any further questions please contact our sales department by email support@simonis.biz, phone (294) 934-0923 or fax (294) 934-0202. Delfina Fischer Manager, Simonis LLC delfina.fisher@simonis.biz (294) 934 0937
re.findall(r"\((\d{3})\) (\d{3})[ -](\d{4})", sample)
[('294', '934', '0923'), ('294', '934', '0202'), ('294', '934', '0937')]
print(re.sub(r"\((\d{3})\) (\d{3})[ -](\d{4})", r"\1.\2.\3", sample))
I am writing in reference to the our conversation on Tuesday, March 2, 2021. Your order of 100,000 units of GC-1344 microcontrollers has been confirmed. You will be billed $19.50 per unit for the total of $1,950,000.00. The expected delivery date is Friday, April 30, 2021. In case of any further questions please contact our sales department by email support@simonis.biz, phone 294.934.0923 or fax 294.934.0202. Delfina Fischer Manager, Simonis LLC delfina.fisher@simonis.biz 294.934.0937